package com.xq.chrome.utils;

import org.codehaus.plexus.util.Base64;
import org.codehaus.plexus.util.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import javax.servlet.http.HttpServletRequest;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 译库网址翻译代理服务工具类
 * 
 * @Description:包含：提取HTML中网址,并转换为代理的网址服务地址;
 * @author zhanglongping
 * @CreateDate: 2016-8-23 上午10:15:08
 * @UpdateUser: zhanglongping
 * @UpdateDate: 2016-8-23 上午10:15:08
 * @UpdateRemark: 说明本次修改内容
 */
public class ProxyUtils {
//    public static void main(String[] args) throws IOException {
////        ProxyUtils pu = new ProxyUtils();
////        pu.getUrlMap("http://english.cas.cn");
//        Connection conn = Jsoup.connect("http://www.bbc.com");
//        Document doc_one = conn.get();
//        System.out.println(doc_one);
//    }

    /**
     * 获取url哈希：key：源url value:代理url
     * @param url
     * @author zhanglongping
     * @date 2016-8-23 上午10:42:41
     */
    public String getUrlMap(String url,String basePath){
//        String url_protocol = "",url_host = "";
        try {
            //特殊网址转换
            url = transformation(url);
            URL urlcurr = new URL(url);
//            url_protocol = urlcurr.getProtocol();
//            url_host = urlcurr.getHost();
            String hostname = urlcurr.getProtocol()+"://"+urlcurr.getHost();
            
//            String proxyHost = basePath;
            
//            String proxyHostName = proxyHost+"proxy/forward?yeekit_proxy_url=";
            
            Document doc_one;
            
            Connection conn = Jsoup.connect(hostname);
            doc_one = conn.get();
            doc_one.setBaseUri(hostname);

//            Elements links = doc_one.select("a[href]");
//            Elements media = doc_one.select("[src]");
//            Elements imports = doc_one.select("link[href]");
            
            Elements head = doc_one.select("meta");
            head.get(0).before("<base href=\""+hostname+"/"+"\" />");
            //鼠标悬停翻译js脚本注入
            //悬停脚本引用
            String hover_js = "<script src=\""+basePath+"/yeekit_translate_url/js/yeekit_hover_trans.js\" type=\"text/javascript\"></script>";
            String jquery_js = "<script src=\"http://cdn.bootcss.com/jquery/3.1.0/jquery.min.js\" type=\"text/javascript\"></script>";
            head.get(0).after(jquery_js + hover_js);
//            for (Element src : media) {
//                String key = src.attr("abs:src");
//                src.attr("src", proxyHostName+key);
//            }
//            
//            for (Element link : imports) {
//                String key = link.attr("abs:href");
//                link.attr("href", proxyHostName+key);
//            }
//            
//            for (Element link : links) {
//                String key = link.attr("abs:href");
//                link.attr("href", proxyHostName+key);
//            }
            
            String dochtml = doc_one.html().toString();
            
            //增强型处理 - 处理js脚本里静态资源地址引用
//            List<String> list_src_img = getImgSrc(dochtml);
//            for(String src:list_src_img){
//                if(src.indexOf("./") > -1){
//                    dochtml = dochtml.replaceAll(src, proxyHostName+hostname+src.substring(1));
//                }
//            }
            
//            System.out.println(dochtml);
            return dochtml;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
        
    }
    
    /**
     * 内容获取
     * @return
     * @author zhanglongping
     * @throws IOException 
     * @date 2016-8-30 下午5:44:31
     */
    public String get_https_html(String url) throws IOException{
        
            URL urlcurr = new URL(url);
            String hostname = urlcurr.getProtocol()+"://"+urlcurr.getHost();
            
            Document doc_one;
            
            Connection conn = Jsoup.connect(hostname);
            doc_one = conn.post();
            doc_one.setBaseUri(hostname);
            
            Elements head = doc_one.select("meta");

            head.get(0).before("<base href=\""+hostname+"/"+"\" />");
            
            String dochtml = doc_one.html().toString();

            return dochtml;
    }
    
    
    /*
     * 解决文件为中文名的乱码问题
     */
    public static String encodeChineseDownloadFileName(HttpServletRequest request, String pFileName) throws UnsupportedEncodingException{
        String filename = null;
        //获取请求头中的浏览器标识
        String agent = request.getHeader("USER-AGENT");
        if(agent != null){
            if(agent.indexOf("Firefox") != -1){
                //Firefox
                filename = "=?UTF-8?B?" + 
                        (new String(Base64.encodeBase64(pFileName.getBytes("UTF-8")))) + "?=";
            }else if(agent.indexOf("Chrome") != -1){
                //Chrome
                filename = new String(pFileName.getBytes(), "ISO8859-1");
            }else{
                //IE7+
                filename = URLEncoder.encode(pFileName, "UTF-8");
                //替换空格
                filename = StringUtils.replace(filename, "+", "%20");
            }
        }else{
            filename = pFileName;
        }
        return filename;
    }
    
    /**
     * 获取img标签中的src值
     * @param content
     * @return
     */
    public  List<String> getImgSrc(String content){
         
        List<String> list = new ArrayList<String>();
        //目前img标签标示有3种表达式
        //<img alt="" src="1.jpg"/>   <img alt="" src="1.jpg"></img>     <img alt="" src="1.jpg">
        //开始匹配content中的<img />标签
        Pattern p_img = Pattern.compile("<(img|IMG)(.*?)(/>|></img>|>)");
        Matcher m_img = p_img.matcher(content);
        boolean result_img = m_img.find();
        if (result_img) {
            while (result_img) {
                //获取到匹配的<img />标签中的内容
                String str_img = m_img.group(2);
                 
                //开始匹配<img />标签中的src
                Pattern p_src = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
                Matcher m_src = p_src.matcher(str_img);
                if (m_src.find()) {
                    String str_src = m_src.group(3);
                    list.add(str_src);
                }
                //结束匹配<img />标签中的src
                 
                //匹配content中是否存在下一个<img />标签，有则继续以上步骤匹配<img />标签中的src
                result_img = m_img.find();
            }
        }
        return list;
    }
    
    /**
     * 特殊网址转换
     * @param url
     * @return
     * @author zhanglongping
     * @date 2016-8-30 下午6:18:48
     */
    public String transformation(String url){
        //百度的二级域名www.baidu.com重定向存在问题
        if(url.equals("http://www.baidu.com")){
            url = "http://baidu.com";
        }
        
        return url;
    }
}